##### replication figure 12 (not replicable with data provided, as would require merging the two nltrial datasets) and 15-17 ####

rm(list = ls())
library(tidyverse)
library(igraph)
library(ggraph)
library(data.table)
library(stringr)
library(gridExtra)


ceelall <- read_csv("eltrial_clean.csv")
cenl <- read_csv("nltrial_clean.csv")

##### Do we get fewer new nominations over time? Replication figure 15 ######
ceelall$targetnominated <- NA
for(i in cenl$ID){
  ceelall$targetnominated[ceelall$target==i] <- cenl$datenominated[cenl$ID==i]
}
ceelall$targetnominated <- as.Date(ceelall$targetnominated, origin= "1970-01-01")

ceelall$newnomination <- ceelall$datenominated==ceelall$targetnominated

nominations <- ceelall %>% group_by(source) %>% summarise(sum(newnomination), n())
names(nominations) <- c("respondentID", "newnames", "nominations")
nominations$responded <- TRUE

bla <- min(nominations$respondentID):max(nominations$respondentID) %>% as_tibble
names(bla) <- "respondentID"

nominations <- full_join(nominations, bla, by="respondentID")
nominations$responded[is.na(nominations$responded)] <- FALSE

nominations$oldnames <- nominations$nominations - nominations$newnames
nominations$pnewnames <- nominations$newnames/nominations$nominations

blu <- cbind(cenl$ID, cenl$dateresponse) %>% as_tibble()
names(blu) <- c("respondentID", "dateresponse")
blu$respondentID <- as.numeric(blu$respondentID)

nominations <- left_join(nominations, blu, by="respondentID")
nominations$dateclean <- nominations$dateresponse
nominations$dateresponse[is.na(nominations$dateclean) & !is.na(nominations$dateresponse)]


table(nominations$dateclean, useNA="always")

par(mfrow = c(1, 2))

nominations$dateclean <- as.Date(nominations$dateclean, origin="1970-01-01")
plot(nominations$dateclean, nominations$newnames, main="New nominations over time",
     xlab="Time of response", ylab="New nominees", pch=19) 
abline(lm(nominations$newnames~nominations$dateclean), col="red") # regression line (y~x)

plot(nominations$dateclean, nominations$nominations, main="Nominations over time",
     xlab="Time of response", ylab="Nominees", pch=19) 
abline(lm(nominations$nominations~nominations$dateclean), col="red") # regression line (y~x)

##### Does this converge / diversify as Gile and Hancock suggest? #####
#### replicating figures 16 and 17 ####

# Problem: waves 8+ is relatively small:
table(cenl$wave) # so lets collapse those

table(cenl$wave, useNA="always")
cenl2 <- cenl
cenl2$wave[cenl2$wave>7] <- "8+"
mydata <- cenl2 %>% group_by(wave, georegion) %>% dplyr::summarise(n=n()) %>% filter(!is.na(georegion))

p1 <- ggplot(mydata, aes(fill=georegion, y=n, x=wave)) + 
  geom_bar(position="fill", stat="identity") +
  xlab("wave")+
  ggtitle("Composition of country of work by wave of respondents")

mydata <- cenl2 %>% group_by(wave, typeinstsimp) %>% dplyr::summarise(n=n()) %>% filter(!is.na(typeinstsimp))
p2 <- ggplot(mydata, aes(fill=typeinstsimp, y=n, x=wave)) + 
  geom_bar(position="fill", stat="identity") +
  xlab("wave")+
  ggtitle("Composition of place of work by wave of respondents")

mydata <- cenl2 %>% group_by(wave, gender) %>% dplyr::summarise(n=n()) %>% filter(!is.na(gender))
p3 <- ggplot(mydata, aes(fill=gender, y=n, x=wave)) + 
  geom_bar(position="fill", stat="identity") +
  xlab("wave")+
  ggtitle("Gender distribution by wave of respondents")

grid.arrange(p1, p2, p3, ncol=3, top="Composition of experts in each wave")

#### Same, but for people nominated by the different waves instead - figure 17 #####

dtnom <- ceelall
names(dtnom)[2] <- "ID"

dtnom <- left_join(dtnom, cenl %>% select("ID", "georegion", "typeinstsimp", "gender"), by="ID")
dtnom$wave <- NULL
names(dtnom)[2] <- "targetID"
names(dtnom)[1] <- "ID"

dtnom <- left_join(dtnom, cenl %>% select("ID", "wave"), by="ID")

table(dtnom$wave, useNA="always")
dtnom$wave[dtnom$wave>7] <- "8+"
mydata <- dtnom %>% group_by(wave, georegion) %>% dplyr::summarise(n=n()) %>% filter(!is.na(georegion))
p1 <- ggplot(mydata, aes(fill=georegion, y=n, x=wave)) + 
  geom_bar(position="fill", stat="identity") +
  xlab("wave")+
  ggtitle("Country of work of experts nominated by experts in each wave")

mydata <- dtnom %>% group_by(wave, typeinstsimp) %>% dplyr::summarise(n=n()) %>% filter(!is.na(typeinstsimp))
p2 <- ggplot(mydata, aes(fill=typeinstsimp, y=n, x=wave)) + 
  geom_bar(position="fill", stat="identity") +
  xlab("wave")+
  ggtitle("Type of work institution of experts nominated by experts in each wave")

mydata <- dtnom %>% group_by(wave, gender) %>% dplyr::summarise(n=n()) %>% filter(!is.na(gender))
p3 <- ggplot(mydata, aes(fill=gender, y=n, x=wave)) + 
  geom_bar(position="fill", stat="identity") +
  xlab("wave")+
  ggtitle("Gender of experts nominated by experts in each wave")

grid.arrange(p1, p2, p3, ncol=3, top="Composition of experts nominated by each wave")



